***************************************************************Part I: Compustat

{
***Compustat: get characteristics for borrowers
*samle period: Jan 1950-Dec 2020
{
clear
set more off
cd "~/DataFolder"
use Compustat.dta  // Downloaded from WRDS

*Duplicates
destring gvkey, force replace
drop if gvkey==. | fyear==.
gsort gvkey fyear -datadate
duplicates tag gvkey fyear, g(dup)
duplicates drop gvkey fyear, force
drop dup

*Fix sich
gsort gvkey -fyear
by gvkey: replace sich=sich[_n-1] if sich==.
gsort gvkey fyear
by gvkey: replace sich=sich[_n-1] if sich==.
destring sic, force replace
replace sich=sic if sich==.
gen sich3=int(sich/10)
gen sich2=int(sich/100)
gen sich1=int(sich/1000)
drop if sich==.
duplicates drop gvkey fyear, force

*Fix naicsh
destring naics, replace
gsort gvkey -fyear
by gvkey: replace naicsh=naicsh[_n-1] if naicsh==.
sort gvkey fyear
by gvkey: replace naicsh=naicsh[_n-1] if naicsh==.
replace naicsh=naics if naicsh==. & naics!=.
duplicates drop gvkey fyear, force

tostring naicsh , replace

rename fyear year

*Get firm size weight in industry
gen naics2=substr(naicsh, 1,2)
gen naics3=substr(naicsh, 1,3)
bys naics3 year: egen total_at_naics3=sum(at)
gen size_weight_naics3=at/total_at_naics3
egen size_qtl=xtile(at), n(10) by(naics3 year)
save compustat_cleaned, replace

*save
duplicates drop gvkey, force
keep gvkey  
save compustat_gvkey, replace
}
*



***DescalSacn Link Table: Borrowers to Compustat
*This link table help link borrowers in DealScan to Compustat using gvkey.

{
clear
set more off
clear 
set more off

cd "~/DataFolder"
import excel using ds_cs_link_April_2018_post.xlsx, firstrow sheet(link_data)
keep facid bcoid gvkey 
save link_toborrower, replace
}



***crosswalk_bhc from Matthieu_Gomez AND SchwertMichael2018 (Updated 2022-06-24)
*This link table helps link lenders in DealScan to BY9C which include banks' information using rssdid.
*Sample period: 1986-2019
{
clear 
set more off
cd "~/DataFolder"
use lcoid_rssd9001_cs_updatedJune2022.dta , clear // From Matthieu_Gomez AND SchwertMichael2018 (Updated 2022-06-24)
rename lcoid lenderid 
rename rssd9001 rssdid  

order rssdid year lenderid matching
tabulate year
bysort year: distinct rssdid
sort rssdid year lenderid
save lenderid_rssdid_crosswalk_updated20220624, replace

keep rssdid 
rename rssdid rssdhcr 
duplicates drop rssdhcr, force 
save BankList_updated20220624, replace
*/
}
*
}
*
